d3zoo <- zoo(d3, d3[,1])
d3zoo
d4 <- na.approx(d3zoo[,-1])
d4
d4 <- data.frame(cbind(d3zoo[,1], d4))
colnames(d4)[1] <- "SDATE"
d5 <- melt(d4, id = c('SDATE'), variable = 'Taxa', na.rm = T)
d6 <- d5 |>
mutate(Abundance = as.numeric(as.character(value)),
Week = as.numeric(strftime(SDATE, format = "%V")),
#  Week = as.numeric(strftime(SDATE, format= "%V")),
WeekAb = Abundance * Week,
# WAb = Abundance * Week,
Year = as.numeric(strftime(SDATE, format = "%Y")),
Station = i)
d6_end <- rbind(d6_end, d6)
rm(d2, d3, d3zoo, d4, d5, d6)
}
d6_end <- d6_end |>
dplyr::filter(Year > 2007,
Year < 2023,
Week < 52, Week >1)
unique(d6_end$Taxa)
################################################################################
# 8. And save the daily interpolated data
dseason <- d6_end |>
filter(Year > 2007) |>
mutate(Season = ifelse(Week >= 5 & Week <= 22,
"Spring bloom",
ifelse(Week > 22 & Week < 37,
"Summer bloom",
"Other")))
dseason1 <- dseason |>
filter(Season != "Other") |>
mutate(select = ifelse(Taxa == "Total",
"yes",
"no")) |>
filter(select == "yes") |>
mutate(Taxa = ifelse(Season == "Spring bloom",
"Spring_bloom",
"Summer_bloom"))
df_interpolated <- d6_end |>
filter(Year > 2007) |>
filter(Taxa != "Total") |>
mutate(Taxa = ifelse(Taxa == "Diatoms" & Week >= 5 & Week <= 22,
"Spring_Diatoms",
ifelse(Taxa == "Diatoms" & Week >= 37,
"Summer_Diatoms",
ifelse(Taxa == "Dinoflagellates" & Week >= 5 & Week <= 22,
"Dinoflagellates",
ifelse(Taxa == "Dinoflagellates" & Week > 22,
"Not_included_Dinoflagellates",
as.character(Taxa))))),
Season = "NA",
select = "yes") |>
filter(Taxa != "Diatoms",
Taxa != "Not_included_Dinoflagellates") |>
rbind(dseason1)
################################################################################
# 9. Peak Timing by center of gravity
SumAb <- df_interpolated |>
group_by(Year, Taxa, Station, Season)|>
summarise(Abundance = sum(Abundance))
SumWeekAb <- df_interpolated |>
group_by(Year, Taxa, Station, Season)|>
summarise(WeekAb = sum(WeekAb))
peak <- merge(SumAb,SumWeekAb) |>
mutate(Timing = round(WeekAb / Abundance)) |>
dplyr::select( - c(Abundance, WeekAb))
# Check the data
peak |>
ggplot(mapping = aes(y = Timing,
x = Taxa,
fill = Station)) +
geom_boxplot(position = position_dodge(1),
alpha = 0.1) +
geom_point(mapping = aes(group = Station),
color = "black",
shape = 21,
size = 1,
position = position_jitterdodge(jitter.width = .1,
dodge.width = 1)) +
theme_bw() +
coord_flip()+
labs(title = "Seasonal peak",
x = "Taxa")
################################################################################
# 10. Bloom Duration
d9 <- df_interpolated |>
filter(Taxa != "Diatoms") |>
#filter(Season == "Growing_season") |>
group_by(Taxa, Year, Station) |>
mutate(cumab = cumsum(Abundance))
CsumYr <- d9 |>
group_by(Year, Taxa, Station) |>
summarise(max.value = max(cumab))
d10 <- merge(CsumYr, d9, all = T) |>
transform(Quart = round(cumab / max.value * 100, digits = 0))
rm(d9, CsumYr)
d10 |>
group_by(Taxa, Week, Station) |>
mutate(Avg = mean(Quart,
na.rm = T)) |>
ungroup()|>
ggplot(mapping = aes(x = Week,
y = Quart,
col = as.character(Year))) +
geom_line(alpha = 0.5) +
geom_line(mapping = aes(y = Avg),
col = "black") +
facet_grid(Taxa ~ Station) +
geom_hline(mapping = aes(yintercept = 25),
lty = 2,
col = "grey50") +
geom_hline(mapping = aes(yintercept = 75),
lty = 2,
col = "grey50") +
theme(legend.position = "none")
###########################
# Interpolation of the quartile
# Beacuse some do not have a value for the 25 and 75th quartile
allQuart <- data.frame(Quart = seq(0, 100, 1))
d6_end_2 = data.frame()
for (i in unique(d10$Station)) {
for(y in unique(d10$Year)){
for(t in unique(d10$Taxa)){
d2 <- d10 |>
filter(Station == i,
Year == y,
Taxa == t) |>
dplyr::select(Week, Quart)
# daily time series
d3 <- merge(
allQuart,
d2,
by = "Quart",
all.x = TRUE)
# Interpolation
d4 <- na.approx(d3)
d4 <- data.frame(cbind(d4))
colnames(d4)[1] <- "Quart_approx"
d4$Station = i
d4$Year = y
d4$Taxa = t
d6_end_2 <- rbind(d6_end_2, d4)
rm(d2, d3, d4)
}
}
}
d10 <- merge(d10, d6_end_2,
by = c("Station", "Year", "Taxa", "Week"),
all.y = T)
rm(d6_end_2)
day25 <- d10[which(d10$Quart_approx == 25),] |>
group_by(Year, Taxa, Station) |>
summarise(D25 = mean(Week))
day75 <- d10[which(d10$Quart_approx == 75),] |>
group_by(Year, Taxa, Station)|>
summarise(D75 = mean(Week))
day50 <- d10[which(d10$Quart_approx == 50),] |>
group_by(Year, Taxa, Station)|>
summarise(Middle = mean(Week))
duration <- day25 |>
merge(day75,
by = c("Taxa", "Year", "Station")) |>
merge(day50,
by = c("Taxa", "Year", "Station")) |>
transform(Dur = D75 - D25)
################################################################################
# 11. Bloom Magnitude
magnitude <- df_interpolated |>
dplyr::select( - Season)|>
merge(duration,
by=c("Taxa", "Year", "Station")) |>
filter(Week >= D25) |>
filter(Week <= D75) |>
group_by(Station, Taxa, Year) |>
summarise(Max = max(Abundance),
Avg = mean(Abundance,
na.rm=T)) |>
as.data.frame()
# 12. Merge all and empty the environment
Full_pp <- duration |>
merge(peak,
by = c("Year", "Taxa", "Station")) |>
merge(magnitude,
by = c("Year", "Taxa", "Station")) |>
mutate(Group = "Phytoplankton") |>
dplyr::select(Taxa,
Year,
Station,
Group,
Timing,
Max,
Avg,
D25,
D75,
Middle,
Dur)
names(Full_pp) <- c("Taxa",
"Year",
"Station",
"Group",
"Timing",
"Maximum",
"Magnitude",
"Start",
"End",
"Middle",
"Duration")
Full_pp |>
mutate(Taxa = ifelse(Taxa == "Summer_Diatoms", "Fall_Diatoms", as.character(Taxa))) |>
write.table("Output/Data/Weekly_Subsampled_Full_PP.txt",
sep = ";")
rm(list=ls())
################################################################################
## Subsampling Zooplankton
zp <- readRDS("Output/Data/zooplankton_sampling_before_interpolation.rds")
# Add a new column for the week
df <- zp %>%
mutate(Week = week(SDATE),
Year = year(SDATE),
Y_W = paste(Year, Week, sep ="_"))
# Select the same month as for the phytoplankton
month <- readRDS("Output/Data/Week_Year_selected.rds")
d1 <- df |>
filter(Y_W %in% month$Y_W,
Station == "BY31") |>
dplyr::select(-c(Week, Year, Y_W))
# Print the result
d1 |>
dplyr::select(Station, SDATE) |>
unique() |>
mutate(Year = year(SDATE), Month = month(SDATE)) |>
group_by(Year, Month, Station) |>
summarise(N = n()) %T>%
#Save data for panel a
write_rds("Output/Data/Sup_subsampled_zp.rds")%>%
ggplot(aes(x=as.factor(Year), y = as.factor(Month), fill = N))+
geom_tile()+facet_grid(Station~.)
################################################################################
# Continue the analysis from PhenologyZP.R Line 368
################################################################################
allDates <- seq.Date(
min(as.Date(d1$SDATE)),
max(as.Date(d1$SDATE)),
"week")
d6_end = data.frame()
for (i in unique(d1$Station)) {
d2 <- d1 |>
dplyr::filter(Station == i) |>
dplyr::select( - Station) |>
spread(Taxa, Value)
d2
# daily time series
d3 <- merge(
x = data.frame(SDATE = allDates),
y = data.frame(d2),
all.x = TRUE)
d3
#if (i == "BY15") {d3 <- d3 |> slice(14:n())}
# Interpolation
# Interpolation
d3zoo <- zoo(d3, d3[,1])
d3zoo
d4 <- na.approx(d3zoo[,-1])
d4
d4 <- data.frame(cbind(d3zoo[,1], d4))
colnames(d4)[1] <- "SDATE"
d5 <- melt(d4, id = c('SDATE'), variable = 'Taxa', na.rm = T)
d6 <- d5 |>
mutate(Abundance = as.numeric(as.character(value)),
Week = as.numeric(strftime(SDATE, format = "%V")),
#  Week = as.numeric(strftime(SDATE, format= "%V")),
WeekAb = Abundance * Week,
# WAb = Abundance * Week,
Year = as.numeric(strftime(SDATE, format = "%Y")),
Station = i)
d6_end <- rbind(d6_end, d6)
rm(d2, d3, d3zoo, d4, d5, d6)
}
df_interpolated <- d6_end |>
filter(Year > 2007,
Year < 2023,
Week < 52, Week >1)
rm(d6_end, d1)
################################################################################
# 9. Bloom timing Center of gravity
SumAb <- df_interpolated |>
group_by(Year, Taxa, Station) |>
summarise(Abundance = sum(Abundance))
SumWeekAb <- df_interpolated |>
group_by(Year, Taxa, Station) |>
summarise(WeekAb = sum(WeekAb))
d8 <- merge(SumAb,
SumWeekAb)
peak <- d8 |>
mutate(Timing = round(WeekAb/Abundance),
Group = ifelse(Taxa %in% c("Temora",
"Centropages",
"Eurytemora",
"Acartia",
"Pseudocalanus",
"Copepoda"),
"Copepoda",
ifelse(Taxa %in% c("Bosmina",
"Evadne",
"Podon",
"Cladocera"),
"Cladocera",
"Rotatoria"))) |>
dplyr::select(Year,
Taxa,
Station,
Timing,
Group)
rm(d8, SumAb, SumWeekAb)
################################################################################
# 10. Bloom Duration
d9 <- df_interpolated |>
group_by(Taxa, Year, Station) |>
mutate(cumab = cumsum(Abundance))
CsumYr <- d9 |>
group_by(Year, Taxa, Station) |>
summarise(max.value = max(cumab))
d10 <- merge(CsumYr, d9, all = T) |>
transform(Quart = round(cumab / max.value * 100,
digits = 0))
d10 |>
group_by(Taxa, Week, Station) |>
mutate(Avg = mean(Quart, na.rm = T)) |>
ungroup()|>
ggplot(aes(x = Week, y = Quart, col = as.character(Year))) +
geom_line(alpha = 0.5) +
geom_line(aes(y = Avg), col = "black") +
facet_grid(Taxa ~ Station) +
theme_zp +
geom_hline(mapping = aes(yintercept = 25), lty = 2, col = "grey50") +
geom_hline(mapping = aes(yintercept = 75), lty = 2, col = "grey50") +
theme(legend.position = "none")
d10 |>
group_by(Taxa, Week, Station) |>
mutate(Avg = mean(Quart, na.rm = T)) |>
ungroup()|>
ggplot(aes(x = Week, y = Quart, col = as.character(Year))) +
geom_line(alpha = 0.5) +
geom_line(aes(y = Avg), col = "black") +
facet_grid(Taxa ~ Station) +
geom_hline(mapping = aes(yintercept = 25), lty = 2, col = "grey50") +
geom_hline(mapping = aes(yintercept = 75), lty = 2, col = "grey50") +
theme(legend.position = "none")
# 10.a. Interpolation of the quartile
# because some do not have a value for the 25 and 75th quartile
allQuart <- data.frame(Quart = seq(0, 100, 1))
d6_end_2 = data.frame()
for (i in unique(d10$Station)) {
for(y in unique(d10$Year)){
for(t in unique(d10$Taxa)){
d2 <- d10 |>
filter(Station == i,
Year == y,
Taxa == t) |>
dplyr::select(Week, Quart)
d3 <- merge(
allQuart,
d2,
by = "Quart",
all.x = TRUE)
# Interpolation
d4 <- na.approx(d3)
d4 <- data.frame(cbind(d4))
colnames(d4)[1] <- "Quart_approx"
d4$Station = i
d4$Year = y
d4$Taxa = t
d6_end_2 <- rbind(d6_end_2, d4)
rm(d2, d3, d4)
}
}
}
d10 <- merge(d10, d6_end_2,
by = c("Station", "Year", "Taxa", "Week"),
all.y = T)
day25 <- d10[which(d10$Quart_approx == 25),] |>
group_by(Year, Taxa, Station) |>
summarise(D25 = mean(Week))
day75 <- d10[which(d10$Quart_approx == 75),] |>
group_by(Year, Taxa, Station)|>
summarise(D75 = mean(Week))
day50 <- d10[which(d10$Quart_approx == 50),] |>
group_by(Year, Taxa, Station)|>
summarise(middle = mean(Week))
duration <- merge(day25, day75, by = c("Taxa", "Year", "Station")) |>
merge(day50, by=c("Taxa", "Year", "Station")) |>
transform(Dur = D75 - D25)|>
mutate(Group= ifelse(Taxa %in% c("Temora",
"Centropages",
"Eurytemora",
"Pseudocalanus",
"Acartia", "Copepoda"),
"Copepoda",
ifelse(Taxa %in% c("Bosmina",
"Evadne",
"Podon", "Cladocera"),
"Cladocera",
"Rotatoria")))
rm(day75, day50, day25, d10)
rm(allQuart, CsumYr, d9)
################################################################################
# 11. Magnitude
magnitude <- df_interpolated |>
merge(duration,
by = c("Taxa", "Year", "Station")) |>
filter(Week >= D25) |>
filter(Week <= D75) |>
group_by(Station, Taxa, Year) |>
summarise(Max = max(Abundance),
Avg = mean(Abundance,
na.rm=T))|>
as.data.frame()
################################################################################
# 12. Merge all phenoloy variables
ZP <- peak |>
merge(magnitude,
by = c("Station", "Taxa", "Year")) |>
merge(duration,
by = c("Taxa", "Year", "Station", "Group"))
names(ZP) <- c("Taxa",
"Year",
"Station",
"Group",
"Timing",
"Maximum",
"Magnitude",
"Start",
"End",
"Middle",
"Duration")
ZP |>
write.table("Output/Data/Weekly_Subsampled_Full_ZP.txt",
sep = ";")
rm(list=ls())
################################################################################
# Analysis --------
################################################################################
# Sampling frequency
sup1a <- readRDS("Output/Data/Sup_subsampled_zp.rds")|>
mutate(facet = "Zooplankton")
sup1b <- readRDS("Output/Data/Sup_subsampled_pp.rds")|>
mutate(facet = "Phytoplankton")
sup1a |> rbind(sup1b) |>
mutate(Month = case_when(Month == 1 ~ "Jan",
Month == 2 ~ "Feb",
Month == 3 ~ "Mar",
Month == 4 ~ "Apr",
Month == 5 ~ "May",
Month == 6 ~ "Jun",
Month == 7 ~ "Jul",
Month == 8 ~ "Aug",
Month == 9 ~ "Sep",
Month == 10 ~ "Oct",
Month == 11 ~ "Nov",
Month == 12 ~ "Dec"),
Month = factor(Month,
levels = c("Dec",
"Nov",
"Oct",
"Sep",
"Aug",
"Jul",
"Jun",
"May",
"Apr",
"Mar",
"Feb",
"Jan")),
facet = factor(facet, levels= c("Zooplankton", "Phytoplankton"))) |>
ggplot(mapping = aes(y = Month,
x = Year,
fill = as.factor(N))) +
geom_tile(col = 1) +
labs(x = NULL,
y = NULL) +
coord_fixed()+
scale_x_continuous(breaks = seq(2008,2021,2))+
scale_fill_manual(values = c("#edf8fb",
"#b3cde3",
"#8c96c6",
"#88419d")) +
facet_grid(Station~facet) +
theme_classic()+
theme(panel.background = element_blank(),
strip.background = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.text = element_text(color = "black",
size = 13),
axis.text.x = element_text(angle = 90,
vjust = 0.5),
strip.text = element_text(size = 15,
color = "black"))
ggsave("Output/Figures/Sup_Fig_S6_ab.pdf", width = 7, height = 6)
# relationship between subsampled dataset and complete dataset
Subsampled_zp <-
read.delim("Output/Data/Weekly_Subsampled_Full_ZP.txt", sep =";") |>
dplyr::select(Taxa, Year, Timing, Magnitude, Start, End, Station, Group)
names(Subsampled_zp) <-
c("Taxa", "Year", "sub_Timing", "sub_Magnitude", "sub_Start", "sub_End", "Station", "Group")
source("Scripts/DifferenceStations.R")
source("Scripts/Nauplii.R")
source("Scripts/FreqPhytoProp.R")
source("Scripts/Validation_Trends.R")
